* Build the final dataset

* The vaccination timeline is the masterfile that determines the municipality systematic
use "$edited/vaccination_rate_timeline.dta", clear

* loads prepered data from the raw file
merge m:1 municipality_id using "$raw/goeg_cases_dead.dta", nogen keep (1 2 3) // Vienna not separated in districts

* loads prepered data from the raw file
merge m:1 municipality_id  using "$raw/goeg_hosp.dta", nogen 

merge m:1 municipality_id using "$raw/vaccination_rate_20211214.dta", nogen // vaccination rate for 2021-12-14 from a different source - contains additional variables

merge m:1 municipality_id using "$edited/pop_statistics.dta", nogen

merge m:1 municipality_id using "$edited/pop_5_years.dta", nogen update

merge m:1 municipality_id using "$edited/pop_female.dta", nogen update

merge m:1 municipality_id using "$edited/salaries.dta", nogen

merge m:1 municipality_id using "$edited/pensions.dta", nogen

merge m:1 municipality_id using "$edited/vaccine_skeptic_doctors.dta", nogen
recode antivax_docs antivax_gps (.=0)

recode antivax_docs antivax_gps (.=0)

merge m:1 municipality_id using "$edited/doctors.dta", nogen

merge m:1 municipality_id using "$edited/urban_rural.dta", nogen
recode type_urban_rural (.=3) // Districts of Vienna

merge m:1 municipality_id using "$edited/waldorf.dta", nogen

merge m:1 municipality_id using "$edited/urban_rural_detailed.dta", nogen

merge m:1 municipality_id using "$edited/nrw_2019.dta", nogen keep(3)

merge m:1 municipality_id using "$edited/bpw_2022.dta", nogen keep(3)

merge m:1 municipality_id using "$edited/vaccination_centers.dta", nogen
recode any_vaccination_center vaccination_centers (.=0)

merge m:1 municipality_id using "$edited/municipality_geo", nogen

// District identifier
gen district = int(municipality_id/100)
gen district_schwaz = district==709 // district received 100.000 extra vaccine doses due to South African variant in March 2021

// Generate some variables that require the merged data
gen sh_antivax_gps = antivax_gps/gps
gen sh_antivax_docs = antivax_docs/gps

// creates one municipality with sh_antivax_docs>1. 
replace sh_antivax_gps=1 if sh_antivax_gps>1 & sh_antivax_gps<.
replace antivax_gps = gps if antivax_gps>gps & antivax_gps<.

gen any_antivax_docs = antivax_docs>0 & antivax_docs<.
gen any_antivax_gps = antivax_gps>0 & antivax_gps<.

lab var sh_antivax_gps "Share vaccine-skeptic GPs"
lab var sh_antivax_docs "Share vaccine-skeptic physicians" // Note: this is defined as physicians/GPs
lab var antivax_gps "Number of vaccine-skeptic GPs"
lab var any_antivax_gps "Any vaccine-skeptic GPs (0/1)"
lab var any_antivax_docs "Any vaccine-skeptic physicians (0/1)"

gen pop_per_gp = population/gps

gen sh_pop_matura = sh_pop_ahs + sh_pop_bhs

* State labels
lab define l_state 1 "Burgenland" 2 "Carinthia" 3 "Lower Austria" 4 "Upper Austria" 5 "Salzburg" ///
	6 "Styria" 7 "Tyrol" 8 "Vorarlberg" 9 "Vienna"
	
lab val state l_state


gen sh_cases_pre = 100*cum_cases_27dec2020/pop
gen sh_deaths_pre = 100000*cum_deaths_27dec2020/pop

gen any_deaths_pre = sh_deaths_pre>0 if sh_deaths_pre<.

lab var sh_cases_pre "Share pop. pre-treatment infection"
lab var sh_deaths_pre "Pre-treatment COVID19 deaths (per 100.000)"
lab var any_deaths_pre "Any prior COVID19 deaths"

* Hospital cases
recode icu_pre  norm_pre (.=0)

foreach type in icu norm {
	gen sh_`type'_pre = 100000 * `type'_pre/pop
}

bysort municipality_id: gen n=_n

* Impute the have these variables available for the entire observation period.
foreach var of varlist sh_cases_pre sh_deaths_pre any_deaths_pre cum_deaths_27dec2020 {
	bysort municipality_id (date): replace `var' = `var'[1] if `var'==.

}

lab var sh_norm_pre "Hospital admissions pre-treatment (per 100.000)"
lab var sh_icu_pre "ICU admissions pre-treatment (per 100.000)"

gen weekday = dow(date)


gen sh_pop_mededu = sh_pop_bms+sh_pop_lehre

replace pensions = pensions/1000
replace salaries = salaries/1000

* population squared
replace mun_pop = mun_pop/1000
gen mun_pop2 = mun_pop^2

lab var mun_pop "Number of inhabitants (in thousands)"
lab var mun_pop2 "Number of inhabitants (squared)"

lab var sh_pop_0_14 "Share pop. below 15 years"
lab var sh_pop_15_29 "Share pop. 15-29 years"
lab var sh_pop_30_44 "Share pop. 30-44 years"
lab var sh_pop_45_59 "Share pop. 45-59 years" 
lab var sh_pop_60_74 "Share pop. 60-74 years" 
lab var sh_pop_75_99 "Share pop. 75 years and older" 

lab var sh_pop_0_4 "Share pop. below 5 years"
lab var sh_pop_5_9 "Share pop. 5-9 years"
lab var sh_pop_10_14 "Share pop. 10-14 years"
lab var sh_pop_15_19 "Share pop. 15-19 years" 
lab var sh_pop_20_24 "Share pop. 20-24 years" 
lab var sh_pop_25_29 "Share pop. 25-29 years" 
lab var sh_pop_30_34 "Share pop. 30-34 years" 
lab var sh_pop_35_39 "Share pop. 34-39 years" 
lab var sh_pop_40_44 "Share pop. 40-44 years" 
lab var sh_pop_45_49 "Share pop. 44-49 years" 
lab var sh_pop_50_54 "Share pop. 50-54 years" 
lab var sh_pop_55_59 "Share pop. 54-59 years" 
lab var sh_pop_60_64 "Share pop. 60-64 years" 
lab var sh_pop_65_69 "Share pop. 64-69 years" 
lab var sh_pop_70_74 "Share pop. 70-74 years" 
lab var sh_pop_75_79 "Share pop. 74-79 years" 
lab var sh_pop_80_84 "Share pop. 80-84 years" 
lab var sh_pop_85_89 "Share pop. 84-89 years" 
lab var sh_pop_90_94 "Share pop. 90-94 years" 
lab var sh_pop_95_99 "Share pop. 94-99 years" 

lab var sh_pop_female "Share pop. female" 

lab var sh_pop_pflichtschule "Share pop. lower secondary edu."
lab var sh_pop_mededu "Share pop. vocational edu."
lab var sh_pop_matura "Share pop. high school" 
lab var sh_pop_uni "Share pop. university" 
lab var salaries "Average salaries (in 1.000 EUR)" 
lab var pensions "Average pension (in 1.000 EUR)" 
lab var sh_pop_foreign "Share pop. foreign born" 
lab var sh_fpo "Share votes Freedom Party (2019)"
lab var sh_grun "Share votes Green Party"
lab var turnout "Election turnout"
lab var district_schwaz "District Schwaz (0/1)"
lab var any_waldorf "Waldorf school or KiGa (0/1)"
lab var nr_waldorf "Nr. of Waldorf schools or KiGa"
lab var sh_brunner "Share votes MFG candidate (2022)"

lab define l_urban_type ///
	101 "Large urban center" ///
	102 "Medium urban center" ///
	103 "Small urban center" ///
	210 "Regional center (central)" ///
	220 "Regional center (intermediate)" ///
	310 "Suburban (close to center)" ///
	320 "Suburban (intermediate)" ///
	330 "Suburban (far from center)" ///
	410 "Rural area (central)" ///
	420 "Rural area (intermediate)" ///
	430 "Rural area (remote)"

lab val urban_type_detailed l_urban_type
tab urban_type_detailed, gen(utd_)

lab var sh_dose_1_20211214 "Share pop. with $1^{st}$ dose"
lab var sh_dose_2_20211214 "Share pop. with $2^{nd}$ dose" 
lab var sh_dose_3_20211214 "Share pop. with $3^{rd}$ dose" 
lab var sh_valid_cert_20211214 "Share pop. with Covid certificate"
 
compress
 
* Vaccination centers
preserve

keep if date==22500 & any_vaccination_center==1 // crossection
keep _CX _CY _ID
save "$edited/vaccine_center_location", replace

restore

preserve

keep if date==22500 // use cross-section
keep vaccination_centers any_vaccination_center any_waldorf _CX _CY _ID any_antivax_gps antivax_gps
save "$edited/geopoints", replace

restore

preserve
keep if date==22500 // use cross-section

spmatrix create contiguity contiguity_matrix, replace
spmatrix create idistance inverse_distance_matrix, replace

foreach var of varlist sh_dose_1_20211214 sh_dose_2_20211214 sh_dose_3_20211214 sh_valid_cert_20211214 sh_antivax_gps antivax_gps any_antivax_gps gps any_vaccination_center any_waldorf {
	spgenerate wn_`var' = contiguity_matrix*`var'
	spgenerate wid_`var' = inverse_distance_matrix*`var'
}

gen near_vaccination_center = any_vaccination_center==1 | wn_any_vaccination_center>0
gen near_waldorf = any_waldorf==1 | wn_any_waldorf>0

lab var near_vaccination_center "Vaccination center near (0/1)"
lab var near_waldorf "Waldorf school or KiGa near (0/1)"

keep municipality_id wn_* wid_* near_*

tempfile t
save `t'

restore 

merge m:1 municipality_id using `t', nogen

drop name
order municipality_id municipality_name mun_pop state district district_schwaz date dh weekday sh_*
compress

save "$edited/final_data", replace

